{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Enter State Farm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using gpu device 0: GeForce GTX TITAN X (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 4007)\n"
     ]
    }
   ],
   "source": [
    "from theano.sandbox import cuda\n",
    "cuda.use('gpu0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using Theano backend.\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "from __future__ import print_function, division\n",
    "path = \"data/state/\"\n",
    "#path = \"data/state/sample/\"\n",
    "import utils; reload(utils)\n",
    "from utils import *\n",
    "from IPython.display import FileLink"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "batch_size=64"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Setup batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n",
      "Found 3478 images belonging to 10 classes.\n"
     ]
    }
   ],
   "source": [
    "batches = get_batches(path+'train', batch_size=batch_size)\n",
    "val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n",
      "Found 3478 images belonging to 10 classes.\n",
      "Found 79726 images belonging to 1 classes.\n"
     ]
    }
   ],
   "source": [
    "(val_classes, trn_classes, val_labels, trn_labels, \n",
    "    val_filenames, filenames, test_filenames) = get_classes(path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Rather than using batches, we could just import all the data into an array to save some processing time. (In most examples I'm using the batches, however - just because that's how I happened to start out.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m-----------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m               Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-53-b0afbfb481fe>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtrn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;34m'train'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mval\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m+\u001b[0m\u001b[1;34m'valid'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/data/jhoward/fast-image/nbs/utils.pyc\u001b[0m in \u001b[0;36mget_data\u001b[1;34m(path)\u001b[0m\n\u001b[0;32m     80\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mget_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     81\u001b[0m     \u001b[0mbatches\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_batches\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mclass_mode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 82\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mbatches\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnext\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbatches\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnb_sample\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     83\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     84\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py\u001b[0m in \u001b[0;36mnext\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    605\u001b[0m             \u001b[0mimg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mload_img\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdirectory\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgrayscale\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mgrayscale\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtarget_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    606\u001b[0m             \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mimg_to_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdim_ordering\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdim_ordering\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 607\u001b[1;33m             \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimage_data_generator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrandom_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    608\u001b[0m             \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimage_data_generator\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstandardize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    609\u001b[0m             \u001b[0mbatch_x\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py\u001b[0m in \u001b[0;36mrandom_transform\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m    364\u001b[0m         \u001b[0mh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mimg_row_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mimg_col_index\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    365\u001b[0m         \u001b[0mtransform_matrix\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtransform_matrix_offset_center\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtransform_matrix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 366\u001b[1;33m         \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mapply_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtransform_matrix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mimg_channel_index\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfill_mode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfill_mode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcval\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcval\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    367\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    368\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchannel_shift_range\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py\u001b[0m in \u001b[0;36mapply_transform\u001b[1;34m(x, transform_matrix, channel_index, fill_mode, cval)\u001b[0m\n\u001b[0;32m    104\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtransform_matrix\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mchannel_index\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfill_mode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'nearest'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcval\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    105\u001b[0m     \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrollaxis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mchannel_index\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 106\u001b[1;33m     \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcv2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwarpAffine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtransform_matrix\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    107\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m==\u001b[0m\u001b[1;36m2\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mexpand_dims\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    108\u001b[0m     \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrollaxis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mchannel_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "trn = get_data(path+'train')\n",
    "val = get_data(path+'valid')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "save_array(path+'results/val.dat', val)\n",
    "save_array(path+'results/trn.dat', trn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "val = load_array(path+'results/val.dat')\n",
    "trn = load_array(path+'results/trn.dat')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Re-run sample experiments on full dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We should find that everything that worked on the sample (see statefarm-sample.ipynb), works on the full dataset too. Only better! Because now we have more data. So let's see how they go - the models in this section are exact copies of the sample notebook models."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Single conv layer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def conv1(batches):\n",
    "    model = Sequential([\n",
    "            BatchNormalization(axis=1, input_shape=(3,224,224)),\n",
    "            Convolution2D(32,3,3, activation='relu'),\n",
    "            BatchNormalization(axis=1),\n",
    "            MaxPooling2D((3,3)),\n",
    "            Convolution2D(64,3,3, activation='relu'),\n",
    "            BatchNormalization(axis=1),\n",
    "            MaxPooling2D((3,3)),\n",
    "            Flatten(),\n",
    "            Dense(200, activation='relu'),\n",
    "            BatchNormalization(),\n",
    "            Dense(10, activation='softmax')\n",
    "        ])\n",
    "\n",
    "    model.compile(Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])\n",
    "    model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, \n",
    "                     nb_val_samples=val_batches.nb_sample)\n",
    "    model.optimizer.lr = 0.001\n",
    "    model.fit_generator(batches, batches.nb_sample, nb_epoch=4, validation_data=val_batches, \n",
    "                     nb_val_samples=val_batches.nb_sample)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/2\n",
      "18946/18946 [==============================] - 114s - loss: 0.2273 - acc: 0.9405 - val_loss: 2.4946 - val_acc: 0.2826\n",
      "Epoch 2/2\n",
      "18946/18946 [==============================] - 114s - loss: 0.0120 - acc: 0.9990 - val_loss: 1.5872 - val_acc: 0.5253\n",
      "Epoch 1/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.0093 - acc: 0.9992 - val_loss: 1.4836 - val_acc: 0.5825\n",
      "Epoch 2/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.0032 - acc: 1.0000 - val_loss: 1.3142 - val_acc: 0.6162\n",
      "Epoch 3/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.0035 - acc: 0.9996 - val_loss: 1.5061 - val_acc: 0.5771\n",
      "Epoch 4/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.0036 - acc: 0.9997 - val_loss: 1.4528 - val_acc: 0.5808\n"
     ]
    }
   ],
   "source": [
    "model = conv1(batches)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Interestingly, with no regularization or augmentation we're getting some reasonable results from our simple convolutional model. So with augmentation, we hopefully will see some very good results."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Data augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n"
     ]
    }
   ],
   "source": [
    "gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, \n",
    "                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)\n",
    "batches = get_batches(path+'train', gen_t, batch_size=batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/2\n",
      "18946/18946 [==============================] - 114s - loss: 1.2804 - acc: 0.5891 - val_loss: 2.0614 - val_acc: 0.3407\n",
      "Epoch 2/2\n",
      "18946/18946 [==============================] - 114s - loss: 0.6716 - acc: 0.7916 - val_loss: 1.3377 - val_acc: 0.6208\n",
      "Epoch 1/4\n",
      "18946/18946 [==============================] - 115s - loss: 0.4787 - acc: 0.8594 - val_loss: 1.2230 - val_acc: 0.6228\n",
      "Epoch 2/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.3724 - acc: 0.8931 - val_loss: 1.3030 - val_acc: 0.6282\n",
      "Epoch 3/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.3086 - acc: 0.9162 - val_loss: 1.1986 - val_acc: 0.7119\n",
      "Epoch 4/4\n",
      "18946/18946 [==============================] - 114s - loss: 0.2612 - acc: 0.9283 - val_loss: 1.4794 - val_acc: 0.5799\n"
     ]
    }
   ],
   "source": [
    "model = conv1(batches)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.2391 - acc: 0.9361 - val_loss: 1.2511 - val_acc: 0.6886\n",
      "Epoch 2/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.2075 - acc: 0.9430 - val_loss: 1.1327 - val_acc: 0.7294\n",
      "Epoch 3/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1800 - acc: 0.9529 - val_loss: 1.1099 - val_acc: 0.7294\n",
      "Epoch 4/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1675 - acc: 0.9557 - val_loss: 1.0660 - val_acc: 0.7363\n",
      "Epoch 5/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1432 - acc: 0.9625 - val_loss: 1.1585 - val_acc: 0.7073\n",
      "Epoch 6/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1358 - acc: 0.9627 - val_loss: 1.1389 - val_acc: 0.6947\n",
      "Epoch 7/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1283 - acc: 0.9665 - val_loss: 1.1329 - val_acc: 0.7369\n",
      "Epoch 8/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1180 - acc: 0.9686 - val_loss: 1.1817 - val_acc: 0.7194\n",
      "Epoch 9/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1137 - acc: 0.9704 - val_loss: 1.0923 - val_acc: 0.7142\n",
      "Epoch 10/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1076 - acc: 0.9720 - val_loss: 1.0983 - val_acc: 0.7358\n",
      "Epoch 11/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.1032 - acc: 0.9736 - val_loss: 1.0206 - val_acc: 0.7458\n",
      "Epoch 12/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.0956 - acc: 0.9740 - val_loss: 0.9039 - val_acc: 0.7809\n",
      "Epoch 13/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.0962 - acc: 0.9740 - val_loss: 1.3386 - val_acc: 0.6587\n",
      "Epoch 14/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.0892 - acc: 0.9777 - val_loss: 1.1150 - val_acc: 0.7470\n",
      "Epoch 15/15\n",
      "18946/18946 [==============================] - 114s - loss: 0.0886 - acc: 0.9773 - val_loss: 1.9190 - val_acc: 0.5802\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f3b6b66f610>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.optimizer.lr = 0.0001\n",
    "model.fit_generator(batches, batches.nb_sample, nb_epoch=15, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.nb_sample)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "I'm shocked by *how* good these results are! We're regularly seeing 75-80% accuracy on the validation set, which puts us into the top third or better of the competition. With such a simple model and no dropout or semi-supervised learning, this really speaks to the power of this approach to data augmentation."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Four conv/pooling pairs + dropout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Unfortunately, the results are still very unstable - the validation accuracy jumps from epoch to epoch. Perhaps a deeper model with some dropout would help."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n"
     ]
    }
   ],
   "source": [
    "gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, \n",
    "                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)\n",
    "batches = get_batches(path+'train', gen_t, batch_size=batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "model = Sequential([\n",
    "        BatchNormalization(axis=1, input_shape=(3,224,224)),\n",
    "        Convolution2D(32,3,3, activation='relu'),\n",
    "        BatchNormalization(axis=1),\n",
    "        MaxPooling2D(),\n",
    "        Convolution2D(64,3,3, activation='relu'),\n",
    "        BatchNormalization(axis=1),\n",
    "        MaxPooling2D(),\n",
    "        Convolution2D(128,3,3, activation='relu'),\n",
    "        BatchNormalization(axis=1),\n",
    "        MaxPooling2D(),\n",
    "        Flatten(),\n",
    "        Dense(200, activation='relu'),\n",
    "        BatchNormalization(),\n",
    "        Dropout(0.5),\n",
    "        Dense(200, activation='relu'),\n",
    "        BatchNormalization(),\n",
    "        Dropout(0.5),\n",
    "        Dense(10, activation='softmax')\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "model.compile(Adam(lr=10e-5), loss='categorical_crossentropy', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/2\n",
      "18946/18946 [==============================] - 159s - loss: 2.6578 - acc: 0.2492 - val_loss: 1.8681 - val_acc: 0.3844\n",
      "Epoch 2/2\n",
      "18946/18946 [==============================] - 158s - loss: 1.8098 - acc: 0.4334 - val_loss: 1.3152 - val_acc: 0.5670\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f227f103ad0>"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit_generator(batches, batches.nb_sample, nb_epoch=2, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.nb_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "model.optimizer.lr=0.001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "18946/18946 [==============================] - 159s - loss: 1.4232 - acc: 0.5405 - val_loss: 1.0877 - val_acc: 0.6452\n",
      "Epoch 2/10\n",
      "18946/18946 [==============================] - 159s - loss: 1.1155 - acc: 0.6346 - val_loss: 1.2730 - val_acc: 0.6878\n",
      "Epoch 3/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.9043 - acc: 0.7025 - val_loss: 1.1393 - val_acc: 0.6354\n",
      "Epoch 4/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.7444 - acc: 0.7529 - val_loss: 1.1037 - val_acc: 0.7087\n",
      "Epoch 5/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.6299 - acc: 0.7955 - val_loss: 0.9123 - val_acc: 0.7455\n",
      "Epoch 6/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.5220 - acc: 0.8275 - val_loss: 1.0418 - val_acc: 0.7484\n",
      "Epoch 7/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.4686 - acc: 0.8495 - val_loss: 1.2907 - val_acc: 0.6599\n",
      "Epoch 8/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.4190 - acc: 0.8653 - val_loss: 1.1321 - val_acc: 0.6906\n",
      "Epoch 9/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.3735 - acc: 0.8802 - val_loss: 1.1235 - val_acc: 0.7458\n",
      "Epoch 10/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.3226 - acc: 0.8969 - val_loss: 1.2040 - val_acc: 0.7343\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f227f104d10>"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit_generator(batches, batches.nb_sample, nb_epoch=10, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.nb_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "model.optimizer.lr=0.00001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.3183 - acc: 0.8976 - val_loss: 1.0359 - val_acc: 0.7688\n",
      "Epoch 2/10\n",
      "18946/18946 [==============================] - 158s - loss: 0.2788 - acc: 0.9109 - val_loss: 1.5806 - val_acc: 0.6705\n",
      "Epoch 3/10\n",
      "18946/18946 [==============================] - 158s - loss: 0.2810 - acc: 0.9124 - val_loss: 0.9836 - val_acc: 0.7887\n",
      "Epoch 4/10\n",
      "18946/18946 [==============================] - 158s - loss: 0.2403 - acc: 0.9244 - val_loss: 1.1832 - val_acc: 0.7493\n",
      "Epoch 5/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.2195 - acc: 0.9303 - val_loss: 1.1524 - val_acc: 0.7510\n",
      "Epoch 6/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.2085 - acc: 0.9359 - val_loss: 1.2245 - val_acc: 0.7415\n",
      "Epoch 7/10\n",
      "18946/18946 [==============================] - 158s - loss: 0.1961 - acc: 0.9399 - val_loss: 1.1232 - val_acc: 0.7654\n",
      "Epoch 8/10\n",
      "18946/18946 [==============================] - 158s - loss: 0.1851 - acc: 0.9416 - val_loss: 1.0956 - val_acc: 0.6892\n",
      "Epoch 9/10\n",
      "18946/18946 [==============================] - 158s - loss: 0.1798 - acc: 0.9451 - val_loss: 1.0586 - val_acc: 0.7740\n",
      "Epoch 10/10\n",
      "18946/18946 [==============================] - 159s - loss: 0.1669 - acc: 0.9471 - val_loss: 1.4633 - val_acc: 0.6656\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f227f104ed0>"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit_generator(batches, batches.nb_sample, nb_epoch=10, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.nb_sample)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "This is looking quite a bit better - the accuracy is similar, but the stability is higher. There's still some way to go however..."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Imagenet conv features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful - in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all. So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout. (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "vgg = Vgg16()\n",
    "model=vgg.model\n",
    "last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]\n",
    "conv_layers = model.layers[:last_conv_idx+1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "conv_model = Sequential(conv_layers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# batches shuffle must be set to False when pre-computing features\n",
    "batches = get_batches(path+'train', batch_size=batch_size, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n",
      "Found 3478 images belonging to 10 classes.\n",
      "Found 79726 images belonging to 1 classes.\n"
     ]
    }
   ],
   "source": [
    "(val_classes, trn_classes, val_labels, trn_labels, \n",
    "    val_filenames, filenames, test_filenames) = get_classes(path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "conv_feat = conv_model.predict_generator(batches, batches.nb_sample)\n",
    "conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)\n",
    "conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "save_array(path+'results/conv_val_feat.dat', conv_val_feat)\n",
    "save_array(path+'results/conv_test_feat.dat', conv_test_feat)\n",
    "save_array(path+'results/conv_feat.dat', conv_feat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3478, 512, 14, 14)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conv_feat = load_array(path+'results/conv_feat.dat')\n",
    "conv_val_feat = load_array(path+'results/conv_val_feat.dat')\n",
    "conv_val_feat.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Batchnorm dense layers on pretrained conv layers"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Since we've pre-computed the output of the last convolutional layer, we need to create a network that takes that as input, and predicts our 10 classes. Let's try using a simplified version of VGG's dense layers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def get_bn_layers(p):\n",
    "    return [\n",
    "        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),\n",
    "        Flatten(),\n",
    "        Dropout(p/2),\n",
    "        Dense(128, activation='relu'),\n",
    "        BatchNormalization(),\n",
    "        Dropout(p/2),\n",
    "        Dense(128, activation='relu'),\n",
    "        BatchNormalization(),\n",
    "        Dropout(p),\n",
    "        Dense(10, activation='softmax')\n",
    "        ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "p=0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model = Sequential(get_bn_layers(p))\n",
    "bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 18946 samples, validate on 3478 samples\n",
      "Epoch 1/1\n",
      "18946/18946 [==============================] - 3s - loss: 1.5894 - acc: 0.5625 - val_loss: 0.7031 - val_acc: 0.7522\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdfd921a690>"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=1, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.optimizer.lr=0.01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 18946 samples, validate on 3478 samples\n",
      "Epoch 1/2\n",
      "18946/18946 [==============================] - 3s - loss: 0.2870 - acc: 0.9109 - val_loss: 0.7728 - val_acc: 0.7683\n",
      "Epoch 2/2\n",
      "18946/18946 [==============================] - 3s - loss: 0.1422 - acc: 0.9594 - val_loss: 0.7576 - val_acc: 0.7936\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdfd921a8d0>"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(conv_feat, trn_labels, batch_size=batch_size, nb_epoch=2, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.save_weights(path+'models/conv8.h5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Looking good! Let's try pre-computing 5 epochs worth of augmented data, so we can experiment with combining dropout and augmentation on the pre-trained model."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Pre-computed data augmentation + dropout"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "We'll use our usual data augmentation parameters:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 18946 images belonging to 10 classes.\n"
     ]
    }
   ],
   "source": [
    "gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, \n",
    "                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)\n",
    "da_batches = get_batches(path+'train', gen_t, batch_size=batch_size, shuffle=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "We use those to create a dataset of convolutional features 5x bigger than the training set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "save_array(path+'results/da_conv_feat2.dat', da_conv_feat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "da_conv_feat = load_array(path+'results/da_conv_feat2.dat')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Let's include the real training data as well in its non-augmented form."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "da_conv_feat = np.concatenate([da_conv_feat, conv_feat])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Since we've now got a dataset 6x bigger than before, we'll need to copy our labels 6 times too."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "da_trn_labels = np.concatenate([trn_labels]*6)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Based on some experiments the previous model works well, with bigger dense layers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def get_bn_da_layers(p):\n",
    "    return [\n",
    "        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),\n",
    "        Flatten(),\n",
    "        Dropout(p),\n",
    "        Dense(256, activation='relu'),\n",
    "        BatchNormalization(),\n",
    "        Dropout(p),\n",
    "        Dense(256, activation='relu'),\n",
    "        BatchNormalization(),\n",
    "        Dropout(p),\n",
    "        Dense(10, activation='softmax')\n",
    "        ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "p=0.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model = Sequential(get_bn_da_layers(p))\n",
    "bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Now we can train the model as usual, with pre-computed augmented data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 241,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 113676 samples, validate on 3478 samples\n",
      "Epoch 1/1\n",
      "113676/113676 [==============================] - 16s - loss: 1.5848 - acc: 0.5068 - val_loss: 0.6340 - val_acc: 0.8131\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdd886a7c90>"
      ]
     },
     "execution_count": 241,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=1, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.optimizer.lr=0.01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 243,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 113676 samples, validate on 3478 samples\n",
      "Epoch 1/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.6652 - acc: 0.7785 - val_loss: 0.6343 - val_acc: 0.8082\n",
      "Epoch 2/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.5247 - acc: 0.8318 - val_loss: 0.6951 - val_acc: 0.8085\n",
      "Epoch 3/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.4553 - acc: 0.8544 - val_loss: 0.6067 - val_acc: 0.8189\n",
      "Epoch 4/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.4127 - acc: 0.8686 - val_loss: 0.7701 - val_acc: 0.7915\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdd88642490>"
      ]
     },
     "execution_count": 243,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 244,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.optimizer.lr=0.0001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 245,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 113676 samples, validate on 3478 samples\n",
      "Epoch 1/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.3837 - acc: 0.8775 - val_loss: 0.6904 - val_acc: 0.8197\n",
      "Epoch 2/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.3576 - acc: 0.8872 - val_loss: 0.6593 - val_acc: 0.8209\n",
      "Epoch 3/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.3384 - acc: 0.8939 - val_loss: 0.7057 - val_acc: 0.8085\n",
      "Epoch 4/4\n",
      "113676/113676 [==============================] - 16s - loss: 0.3254 - acc: 0.8977 - val_loss: 0.6867 - val_acc: 0.8128\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdd88642710>"
      ]
     },
     "execution_count": 245,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Looks good - let's save those weights."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 246,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.save_weights(path+'models/da_conv8_1.h5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "### Pseudo labeling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "We're going to try using a combination of [pseudo labeling](http://deeplearning.net/wp-content/uploads/2013/03/pseudo_label_final.pdf) and [knowledge distillation](https://arxiv.org/abs/1503.02531) to allow us to use unlabeled data (i.e. do semi-supervised learning). For our initial experiment we'll use the validation set as the unlabeled data, so that we can see that it is working without using the test set. At a later date we'll try using the test set."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "To do this, we simply calculate the predictions of our model..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 247,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "val_pseudo = bn_model.predict(conv_val_feat, batch_size=batch_size)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "...concatenate them with our training labels..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 255,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "comb_pseudo = np.concatenate([da_trn_labels, val_pseudo])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 256,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "comb_feat = np.concatenate([da_conv_feat, conv_val_feat])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "...and fine-tune our model using that data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 257,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.load_weights(path+'models/da_conv8_1.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 258,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 117154 samples, validate on 3478 samples\n",
      "Epoch 1/1\n",
      "117154/117154 [==============================] - 17s - loss: 0.3412 - acc: 0.8948 - val_loss: 0.7653 - val_acc: 0.8191\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdd88642f50>"
      ]
     },
     "execution_count": 258,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=1, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 259,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 117154 samples, validate on 3478 samples\n",
      "Epoch 1/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.3237 - acc: 0.9008 - val_loss: 0.7536 - val_acc: 0.8229\n",
      "Epoch 2/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.3076 - acc: 0.9050 - val_loss: 0.7572 - val_acc: 0.8235\n",
      "Epoch 3/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.2984 - acc: 0.9085 - val_loss: 0.7852 - val_acc: 0.8269\n",
      "Epoch 4/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.2902 - acc: 0.9117 - val_loss: 0.7630 - val_acc: 0.8263\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdd89bdd210>"
      ]
     },
     "execution_count": 259,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 260,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.optimizer.lr=0.00001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 261,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 117154 samples, validate on 3478 samples\n",
      "Epoch 1/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.2837 - acc: 0.9134 - val_loss: 0.7901 - val_acc: 0.8200\n",
      "Epoch 2/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.2760 - acc: 0.9155 - val_loss: 0.7648 - val_acc: 0.8275\n",
      "Epoch 3/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.2723 - acc: 0.9183 - val_loss: 0.7382 - val_acc: 0.8358\n",
      "Epoch 4/4\n",
      "117154/117154 [==============================] - 17s - loss: 0.2657 - acc: 0.9191 - val_loss: 0.7227 - val_acc: 0.8329\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fdd89bb2890>"
      ]
     },
     "execution_count": 261,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4, \n",
    "             validation_data=(conv_val_feat, val_labels))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "That's a distinct improvement - even although the validation set isn't very big. This looks encouraging for when we try this on the test set."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 262,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "bn_model.save_weights(path+'models/bn-ps8.h5')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Submit"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We'll find a good clipping amount using the validation set, prior to submitting."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 271,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def do_clip(arr, mx): return np.clip(arr, (1-mx)/9, mx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 282,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(0.6726388006592667)"
      ]
     },
     "execution_count": 282,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.93)).eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 283,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "conv_test_feat = load_array(path+'results/conv_test_feat.dat')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 284,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 285,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "subm = do_clip(preds,0.93)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "subm_name = path+'results/subm.gz'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 296,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "classes = sorted(batches.class_indices, key=batches.class_indices.get)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 301,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>img</th>\n",
       "      <th>c0</th>\n",
       "      <th>c1</th>\n",
       "      <th>c2</th>\n",
       "      <th>c3</th>\n",
       "      <th>c4</th>\n",
       "      <th>c5</th>\n",
       "      <th>c6</th>\n",
       "      <th>c7</th>\n",
       "      <th>c8</th>\n",
       "      <th>c9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>img_68347.jpg</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.093739</td>\n",
       "      <td>0.815874</td>\n",
       "      <td>0.079049</td>\n",
       "      <td>0.007778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>img_55725.jpg</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.930000</td>\n",
       "      <td>0.007778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>img_92799.jpg</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.930000</td>\n",
       "      <td>0.017918</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.009022</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>img_72170.jpg</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.363869</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.200521</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.425176</td>\n",
       "      <td>0.007778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>img_59154.jpg</td>\n",
       "      <td>0.695756</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.007778</td>\n",
       "      <td>0.047384</td>\n",
       "      <td>0.249183</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             img        c0        c1        c2        c3        c4        c5  \\\n",
       "0  img_68347.jpg  0.007778  0.007778  0.007778  0.007778  0.007778  0.007778   \n",
       "1  img_55725.jpg  0.007778  0.007778  0.007778  0.007778  0.007778  0.007778   \n",
       "2  img_92799.jpg  0.007778  0.930000  0.017918  0.007778  0.007778  0.007778   \n",
       "3  img_72170.jpg  0.007778  0.007778  0.363869  0.007778  0.007778  0.007778   \n",
       "4  img_59154.jpg  0.695756  0.007778  0.007778  0.007778  0.007778  0.007778   \n",
       "\n",
       "         c6        c7        c8        c9  \n",
       "0  0.093739  0.815874  0.079049  0.007778  \n",
       "1  0.007778  0.007778  0.930000  0.007778  \n",
       "2  0.009022  0.007778  0.007778  0.007778  \n",
       "3  0.200521  0.007778  0.425176  0.007778  \n",
       "4  0.007778  0.007778  0.047384  0.249183  "
      ]
     },
     "execution_count": 301,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "submission = pd.DataFrame(subm, columns=classes)\n",
    "submission.insert(0, 'img', [a[4:] for a in test_filenames])\n",
    "submission.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 307,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "submission.to_csv(subm_name, index=False, compression='gzip')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 308,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<a href='data/state/results/subm.gz' target='_blank'>data/state/results/subm.gz</a><br>"
      ],
      "text/plain": [
       "/data/jhoward/fast-image/nbs/data/state/results/subm.gz"
      ]
     },
     "execution_count": 308,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "FileLink(subm_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This gets 0.534 on the leaderboard."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## The \"things that didn't really work\" section"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "You can safely ignore everything from here on, because they didn't really help."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true,
    "hidden": true
   },
   "source": [
    "### Finetune some conv layers too"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "for l in get_bn_layers(p): conv_model.add(l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "for l1,l2 in zip(bn_model.layers, conv_model.layers[last_conv_idx+1:]):\n",
    "    l2.set_weights(l1.get_weights())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "for l in conv_model.layers: l.trainable =False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "for l in conv_model.layers[last_conv_idx+1:]: l.trainable =True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "comb = np.concatenate([trn, val])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "gen_t = image.ImageDataGenerator(rotation_range=8, height_shift_range=0.04, \n",
    "                shear_range=0.03, channel_shift_range=10, width_shift_range=0.08)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [
    {
     "ename": "Exception",
     "evalue": "X (images tensor) and y (labels) should have the same length. Found: X.shape = (22424, 3, 224, 224), y.shape = (98208, 10)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m-----------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mException\u001b[0m                       Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-38-8e21fbf7f6e7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mbatches\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgen_t\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mflow\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcomb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcomb_pseudo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py\u001b[0m in \u001b[0;36mflow\u001b[1;34m(self, X, y, batch_size, shuffle, seed, save_to_dir, save_prefix, save_format)\u001b[0m\n\u001b[0;32m    274\u001b[0m             \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbatch_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mshuffle\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mshuffle\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mseed\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    275\u001b[0m             \u001b[0mdim_ordering\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdim_ordering\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 276\u001b[1;33m             save_to_dir=save_to_dir, save_prefix=save_prefix, save_format=save_format)\n\u001b[0m\u001b[0;32m    277\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    278\u001b[0m     def flow_from_directory(self, directory,\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/preprocessing/image.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, X, y, image_data_generator, batch_size, shuffle, seed, dim_ordering, save_to_dir, save_prefix, save_format)\u001b[0m\n\u001b[0;32m    473\u001b[0m             raise Exception('X (images tensor) and y (labels) '\n\u001b[0;32m    474\u001b[0m                             \u001b[1;34m'should have the same length. '\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 475\u001b[1;33m                             'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape))\n\u001b[0m\u001b[0;32m    476\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mdim_ordering\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'default'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    477\u001b[0m             \u001b[0mdim_ordering\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mK\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mimage_dim_ordering\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mException\u001b[0m: X (images tensor) and y (labels) should have the same length. Found: X.shape = (22424, 3, 224, 224), y.shape = (98208, 10)"
     ]
    }
   ],
   "source": [
    "batches = gen_t.flow(comb, comb_pseudo, batch_size=batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 3478 images belonging to 10 classes.\n"
     ]
    }
   ],
   "source": [
    "val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "conv_model.compile(Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/1\n",
      "22400/22424 [============================>.] - ETA: 0s - loss: 0.4348 - acc: 0.9200"
     ]
    },
    {
     "ename": "MemoryError",
     "evalue": "Error allocating 1644167168 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY).\nApply node that caused the error: GpuAllocEmpty(Shape_i{0}.0, Shape_i{0}.0, Elemwise{Composite{(((i0 - i1) // i2) + i2)}}[(0, 1)].0, Elemwise{Composite{(((i0 - i1) // i2) + i2)}}[(0, 1)].0)\nToposort index: 157\nInputs types: [TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar)]\nInputs shapes: [(), (), (), ()]\nInputs strides: [(), (), (), ()]\nInputs values: [array(128), array(64), array(224), array(224)]\nOutputs clients: [[GpuDnnConv{algo='small', inplace=True}(GpuContiguous.0, GpuContiguous.0, GpuAllocEmpty.0, GpuDnnConvDesc{border_mode='valid', subsample=(1, 1), conv_mode='conv', precision='float32'}.0, Constant{1.0}, Constant{0.0})]]\n\nHINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.\nHINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m-----------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mMemoryError\u001b[0m                     Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-178-50c2f05dc6a4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m conv_model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, \n\u001b[1;32m----> 2\u001b[1;33m                  nb_val_samples=val_batches.N)\n\u001b[0m",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/models.pyc\u001b[0m in \u001b[0;36mfit_generator\u001b[1;34m(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, **kwargs)\u001b[0m\n\u001b[0;32m    872\u001b[0m                                         \u001b[0mmax_q_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_q_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    873\u001b[0m                                         \u001b[0mnb_worker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnb_worker\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 874\u001b[1;33m                                         pickle_safe=pickle_safe)\n\u001b[0m\u001b[0;32m    875\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    876\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mevaluate_generator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgenerator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mval_samples\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmax_q_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnb_worker\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpickle_safe\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36mfit_generator\u001b[1;34m(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe)\u001b[0m\n\u001b[0;32m   1469\u001b[0m                         val_outs = self.evaluate_generator(validation_data,\n\u001b[0;32m   1470\u001b[0m                                                            \u001b[0mnb_val_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1471\u001b[1;33m                                                            max_q_size=max_q_size)\n\u001b[0m\u001b[0;32m   1472\u001b[0m                     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1473\u001b[0m                         \u001b[1;31m# no need for try/except because\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36mevaluate_generator\u001b[1;34m(self, generator, val_samples, max_q_size, nb_worker, pickle_safe)\u001b[0m\n\u001b[0;32m   1552\u001b[0m                                 'or (x, y). Found: ' + str(generator_output))\n\u001b[0;32m   1553\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1554\u001b[1;33m                 \u001b[0mouts\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtest_on_batch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1555\u001b[0m             \u001b[1;32mexcept\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1556\u001b[0m                 \u001b[0m_stop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc\u001b[0m in \u001b[0;36mtest_on_batch\u001b[1;34m(self, x, y, sample_weight)\u001b[0m\n\u001b[0;32m   1257\u001b[0m             \u001b[0mins\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0msample_weights\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1258\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_test_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1259\u001b[1;33m         \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtest_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mins\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1260\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1261\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0moutputs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/keras/backend/theano_backend.pyc\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m    715\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    716\u001b[0m         \u001b[1;32massert\u001b[0m \u001b[0mtype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m{\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 717\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    718\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    719\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    869\u001b[0m                     \u001b[0mnode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnodes\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mposition_of_error\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    870\u001b[0m                     \u001b[0mthunk\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mthunk\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 871\u001b[1;33m                     storage_map=getattr(self.fn, 'storage_map', None))\n\u001b[0m\u001b[0;32m    872\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    873\u001b[0m                 \u001b[1;31m# old-style linkers raise their own exceptions\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/theano/gof/link.pyc\u001b[0m in \u001b[0;36mraise_with_op\u001b[1;34m(node, thunk, exc_info, storage_map)\u001b[0m\n\u001b[0;32m    312\u001b[0m         \u001b[1;31m# extra long error message in that case.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    313\u001b[0m         \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 314\u001b[1;33m     \u001b[0mreraise\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexc_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexc_value\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexc_trace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    315\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    316\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.pyc\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    857\u001b[0m         \u001b[0mt0_fn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    858\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 859\u001b[1;33m             \u001b[0moutputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    860\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    861\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'position_of_error'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mMemoryError\u001b[0m: Error allocating 1644167168 bytes of device memory (CNMEM_STATUS_OUT_OF_MEMORY).\nApply node that caused the error: GpuAllocEmpty(Shape_i{0}.0, Shape_i{0}.0, Elemwise{Composite{(((i0 - i1) // i2) + i2)}}[(0, 1)].0, Elemwise{Composite{(((i0 - i1) // i2) + i2)}}[(0, 1)].0)\nToposort index: 157\nInputs types: [TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar), TensorType(int64, scalar)]\nInputs shapes: [(), (), (), ()]\nInputs strides: [(), (), (), ()]\nInputs values: [array(128), array(64), array(224), array(224)]\nOutputs clients: [[GpuDnnConv{algo='small', inplace=True}(GpuContiguous.0, GpuContiguous.0, GpuAllocEmpty.0, GpuDnnConvDesc{border_mode='valid', subsample=(1, 1), conv_mode='conv', precision='float32'}.0, Constant{1.0}, Constant{0.0})]]\n\nHINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.\nHINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node."
     ]
    }
   ],
   "source": [
    "conv_model.fit_generator(batches, batches.N, nb_epoch=1, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "conv_model.optimizer.lr = 0.0001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "conv_model.fit_generator(batches, batches.N, nb_epoch=3, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "for l in conv_model.layers[16:]: l.trainable =True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "conv_model.optimizer.lr = 0.00001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "conv_model.fit_generator(batches, batches.N, nb_epoch=8, validation_data=val_batches, \n",
    "                 nb_val_samples=val_batches.N)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "conv_model.save_weights(path+'models/conv8_ps.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "conv_model.load_weights(path+'models/conv8_da.h5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {
    "collapsed": false,
    "hidden": true,
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "val_pseudo = conv_model.predict(val, batch_size=batch_size*2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "save_array(path+'models/pseudo8_da.dat', val_pseudo)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "heading_collapsed": true,
    "hidden": true
   },
   "source": [
    "### Ensembling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>subject</th>\n",
       "      <th>classname</th>\n",
       "      <th>img</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>p002</td>\n",
       "      <td>c0</td>\n",
       "      <td>img_44733.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>p002</td>\n",
       "      <td>c0</td>\n",
       "      <td>img_72999.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>p002</td>\n",
       "      <td>c0</td>\n",
       "      <td>img_25094.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>p002</td>\n",
       "      <td>c0</td>\n",
       "      <td>img_69092.jpg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>p002</td>\n",
       "      <td>c0</td>\n",
       "      <td>img_92629.jpg</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  subject classname            img\n",
       "0    p002        c0  img_44733.jpg\n",
       "1    p002        c0  img_72999.jpg\n",
       "2    p002        c0  img_25094.jpg\n",
       "3    p002        c0  img_69092.jpg\n",
       "4    p002        c0  img_92629.jpg"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "drivers_ds = pd.read_csv(path+'driver_imgs_list.csv')\n",
    "drivers_ds.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "img2driver = drivers_ds.set_index('img')['subject'].to_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "driver2imgs = {k: g[\"img\"].tolist() \n",
    "               for k,g in drivers_ds[['subject', 'img']].groupby(\"subject\")}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "def get_idx(driver_list):\n",
    "    return [i for i,f in enumerate(filenames) if img2driver[f[3:]] in driver_list]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "drivers = driver2imgs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "rnd_drivers = np.random.permutation(drivers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "ds1 = rnd_drivers[:len(rnd_drivers)//2]\n",
    "ds2 = rnd_drivers[len(rnd_drivers)//2:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "models=[fit_conv([d]) for d in drivers]\n",
    "models=[m for m in models if m is not None]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [],
   "source": [
    "all_preds = np.stack([m.predict(conv_test_feat, batch_size=128) for m in models])\n",
    "avg_preds = all_preds.mean(axis=0)\n",
    "avg_preds = avg_preds/np.expand_dims(avg_preds.sum(axis=1), 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(0.9753041572894531)"
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keras.metrics.categorical_crossentropy(val_labels, np.clip(avg_val_preds,0.01,0.99)).eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false,
    "hidden": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(0.6949396133422852, dtype=float32)"
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keras.metrics.categorical_accuracy(val_labels, np.clip(avg_val_preds,0.01,0.99)).eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "hidden": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  },
  "nav_menu": {},
  "nbpresent": {
   "slides": {
    "28b43202-5690-4169-9aca-6b9dabfeb3ec": {
     "id": "28b43202-5690-4169-9aca-6b9dabfeb3ec",
     "prev": null,
     "regions": {
      "3bba644a-cf4d-4a49-9fbd-e2554428cf9f": {
       "attrs": {
        "height": 0.8,
        "width": 0.8,
        "x": 0.1,
        "y": 0.1
       },
       "content": {
        "cell": "f3d3a388-7e2a-4151-9b50-c20498fceacc",
        "part": "whole"
       },
       "id": "3bba644a-cf4d-4a49-9fbd-e2554428cf9f"
      }
     }
    },
    "8104def2-4b68-44a0-8f1b-b03bf3b2a079": {
     "id": "8104def2-4b68-44a0-8f1b-b03bf3b2a079",
     "prev": "28b43202-5690-4169-9aca-6b9dabfeb3ec",
     "regions": {
      "7dded777-1ddf-4100-99ae-25cf1c15b575": {
       "attrs": {
        "height": 0.8,
        "width": 0.8,
        "x": 0.1,
        "y": 0.1
       },
       "content": {
        "cell": "fe47bd48-3414-4657-92e7-8b8d6cb0df00",
        "part": "whole"
       },
       "id": "7dded777-1ddf-4100-99ae-25cf1c15b575"
      }
     }
    }
   },
   "themes": {}
  },
  "toc": {
   "nav_menu": {
    "height": "148px",
    "width": "254px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 6,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
